{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from os import listdir\n",
    "from os.path import isfile, join\n",
    "import numpy as np\n",
    "import matplotlib.image as mpimg\n",
    "from skimage.transform import resize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  positive class - Normal lung images\n",
    "X_train_pos = []\n",
    "mypath = \"../data/train/NORMAL/\"\n",
    "onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n",
    "for im_name in onlyfiles:\n",
    "    if \".jpeg\" in im_name:\n",
    "        img = mpimg.imread(mypath+im_name)\n",
    "        img = np.array(img)\n",
    "        img = resize(img, (256,256))\n",
    "        X_train_pos.append(img)\n",
    "    \n",
    "y_train_pos = []\n",
    "for i in range(len(X_train_pos)):\n",
    "    y_train_pos.append(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  negative class - Lung images with Pneumonia\n",
    "X_train_neg = []\n",
    "mypath = \"../data/train/PNEUMONIA/\"\n",
    "onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n",
    "for im_name in onlyfiles:\n",
    "    if \".jpeg\" in im_name:\n",
    "        img = mpimg.imread(mypath+im_name)\n",
    "        img = np.array(img)\n",
    "        img = resize(img, (256,256))\n",
    "        X_train_neg.append(img)\n",
    "\n",
    "y_train_neg = []\n",
    "for i in range(len(X_train_neg)):\n",
    "    y_train_neg.append(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#  Some images has three channels, which are the duplicating the grayscale channel, \n",
    "#  so remove additional channels\n",
    "for i in range(len(X_train_neg)):\n",
    "    if len(X_train_neg[i].shape) > 2:\n",
    "        X_train_neg[i] = X_train_neg[i].reshape(-1,256,256)[0]\n",
    "    \n",
    "#  Each list contains numpy arrays of with equal dimensions except first, \n",
    "#  so stack it to one numpy matrix\n",
    "a = np.stack(X_train_pos, axis=0)\n",
    "b = np.stack(X_train_neg, axis=0)\n",
    "c = np.stack(y_train_pos, axis=0)\n",
    "d = np.stack(y_train_neg, axis=0)\n",
    "\n",
    "# X train file is concatenated positve and negative X classes\n",
    "x_train = np.concatenate((a, b))\n",
    "# the same with Y train file\n",
    "y_train = np.concatenate((c, d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('../data/x_train.npy', x_train)\n",
    "np.save('../data/y_train.npy', y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
